# function to simulate and summarize quantities of interest
# 	built for cvdm paper
# by Fabien Cottier, June 2018

# Model should a regression model
# if lag=T, then regression is carried using t-1 variables
# supports both logit and negbin regression


#	ONLY INTERACTION MODELS!!!


qi_inter <- function(model,lag=FALSE){

	# packages required
	require(MASS)
	require(arm)
	require(fastDummies)
	source("Rscripts/extractQI.R") # summarize quantities of interest

	# extract class of model and link function
	regM <- class(model)[1]
	if (regM %in% c("bife","clogit")){
		lfun <- "logit"
	} else {
		lfun <- family(model)$link
	}


	# matrix to store point estimate and confidence interval
	apd_matrix <- matrix(0,nrow=9,ncol=6,dimnames=list(c(
	    "Fully affected, Floods","Fully affected, Displ.","diff-in-diffs",
	    "Partially affected, Floods","Partially affected, Displ.","diff-in-diffs",
	    "Proximate, Floods","Proximate, Displ.","diff-in-diffs"
	    ),
	   c("mean","median",".025lb",".05lb",".95ub",".975ub")))


	# matrix to store point estimate and confidence interval
	apd_matrix <- matrix(0,nrow=9,ncol=6,dimnames=list(c(
	    "Fully affected, Floods","Fully affected, Displ.","diff-in-diffs",
	    "Partially affected, Floods","Partially affected, Displ.","diff-in-diffs",
	    "Proximate, Floods","Proximate, Displ.","diff-in-diffs"
	    ),
	   c("mean","median",".025lb",".05lb",".95ub",".975ub")))


	# floods and displacement varnames // variables names adjusted for simulations with lag variables
	varlist<-c("floodpw","floodnpw","floodnear","iterm2_pw","iterm2_npw","iterm2_near","DIDP_1000pw","DIDP_1000npw","DIDP_1000near","iterm1_pw","iterm1_npw","iterm1_near")
	for (i in 1:length(varlist)){
		assign(
			varlist[i],
			ifelse(
				lag==FALSE,
				varlist[i],
				paste(varlist[i],
					"_1tl",
					sep=""
				)
			)
		)
	}


	# Get matrix of simulated parameters
	set.seed(1234)
	if (regM == "bife"){
		coefV <- c(model$par_corr$beta,model$par_corr$alpha)
		vcovM <- matrix(0,length(coefV),length(coefV))
		vcovM[1:length(model$par_corr$beta),1:length(model$par_corr$beta)] <- model$par_corr$beta_vcov
		diag(vcovM[(length(model$par_corr$beta)+1):length(coefV),(length(model$par_corr$beta)+1):length(coefV)]) <- (model$par_corr$se_alpha)^2
		s.model <- MASS::mvrnorm(1000,coefV,vcovM)
	} else if (regM == "clogit"){
		s.model <- MASS::mvrnorm(1000,coef(model),vcov(model))
	} else if (regM == "negbin"){
		s.model <- MASS::mvrnorm(1000,coef(model),vcovHC(model,"const"))
	} else {
		s.model <- MASS::mvrnorm(1000,coef(model),vcovHC(model,type="HC0"))
	}

	# extract model matrix
	if (regM == "bife"){
		X <- model$model_info$X
		colnames(X) <- model$model_info$str_name
		Alpha<-fastDummies::dummy_cols(model$model_info$id)[,2:(length(model$par_corr$alpha)+1)]
		MatX <- cbind(X,Alpha)
	} else {
		MatX <- model.matrix(model)
	}

	# first differences: fully affected admin unit
	model.data_pw <- MatX
	colnames(model.data_pw)
	model.data_pw <- model.data_pw[which(model.data_pw[,"idp_ra"]==1),]
	model.data_pw <- model.data_pw[which(model.data_pw[,floodpw]==1),]
	model.data_pw_cf1 <- model.data_pw
	model.data_pw_cf2 <- model.data_pw
	#
	model.data_pw[,which(colnames(model.data_pw)==floodpw)]<-0
	model.data_pw[,which(colnames(model.data_pw)==iterm2_pw)]<-0
	model.data_pw[,which(colnames(model.data_pw)==DIDP_1000pw)]<-0
	model.data_pw[,which(colnames(model.data_pw)==iterm1_pw)]<-0
	#
	model.data_pw_cf1[,which(colnames(model.data_pw_cf1)==floodpw)]<-1
	model.data_pw_cf1[,which(colnames(model.data_pw_cf1)==iterm2_pw)]<-1
	model.data_pw_cf1[,which(colnames(model.data_pw_cf1)==DIDP_1000pw)]<-0
	model.data_pw_cf1[,which(colnames(model.data_pw_cf1)==iterm1_pw)]<-0
	#
	model.data_pw_cf2[,which(colnames(model.data_pw_cf2)==floodpw)]<-1
	model.data_pw_cf2[,which(colnames(model.data_pw_cf2)==iterm2_pw)]<-1
	model.data_pw_cf2[,which(colnames(model.data_pw_cf2)==DIDP_1000pw)]<-1
	model.data_pw_cf2[,which(colnames(model.data_pw_cf2)==iterm1_pw)]<-1

	# extract quantities of interest
	p.model_pw<-array(0,dim=c(1000,3))
	for (i in 1:1000){
	  X1 <- expression(s.model[i,]%*%t(model.data_pw))
	  X2 <- expression(s.model[i,]%*%t(model.data_pw_cf1))
	  X3 <- expression(s.model[i,]%*%t(model.data_pw_cf2))
	  p.model_pw[i,1] <- mean(switch(lfun,logit=invlogit(eval(X1)),log=exp(eval(X1))))
	  p.model_pw[i,2] <- mean(switch(lfun,logit=invlogit(eval(X2)),log=exp(eval(X2))))
	  p.model_pw[i,3] <- mean(switch(lfun,logit=invlogit(eval(X3)),log=exp(eval(X3))))
	}
	# average predicted difference:
	apd_matrix[1,] <- extractQI(vector=p.model_pw[,2]-p.model_pw[,1])
	apd_matrix[2,] <- extractQI(vector=p.model_pw[,3]-p.model_pw[,1])
	apd_matrix[3,] <- extractQI(vector=p.model_pw[,3]-p.model_pw[,2])
	apd_matrix[1:3,]

	



	# first differences:  partially affected admin unit
	model.data_npw <- MatX
	colnames(model.data_npw)
	model.data_npw <- model.data_npw[which(model.data_npw[,"idp_ra"]==1),]
	model.data_npw <- model.data_npw[which(model.data_npw[,floodnpw]==1),]
	model.data_npw_cf1 <- model.data_npw
	model.data_npw_cf2 <- model.data_npw
	#
	model.data_npw[,which(colnames(model.data_npw)==floodnpw)]<-0
	model.data_npw[,which(colnames(model.data_npw)==iterm2_npw)]<-0
	model.data_npw[,which(colnames(model.data_npw)==DIDP_1000npw)]<-0
	model.data_npw[,which(colnames(model.data_npw)==iterm1_npw)]<-0
	#
	model.data_npw_cf1[,which(colnames(model.data_npw_cf1)==floodnpw)]<-1
	model.data_npw_cf1[,which(colnames(model.data_npw_cf1)==iterm2_npw)]<-1
	model.data_npw_cf1[,which(colnames(model.data_npw_cf1)==DIDP_1000npw)]<-0
	model.data_npw_cf1[,which(colnames(model.data_npw_cf1)==iterm1_npw)]<-0
	#
	model.data_npw_cf2[,which(colnames(model.data_npw_cf2)==floodnpw)]<-1
	model.data_npw_cf2[,which(colnames(model.data_npw_cf2)==iterm2_npw)]<-1
	model.data_npw_cf2[,which(colnames(model.data_npw_cf2)==DIDP_1000npw)]<-1
	model.data_npw_cf2[,which(colnames(model.data_npw_cf2)==iterm1_npw)]<-1

	# extract quantities of interest
	p.model_npw<-array(0,dim=c(1000,3))
	for (i in 1:1000){
	  X1 <- expression(s.model[i,]%*%t(model.data_npw))
	  X2 <- expression(s.model[i,]%*%t(model.data_npw_cf1))
	  X3 <- expression(s.model[i,]%*%t(model.data_npw_cf2))
	  p.model_npw[i,1] <-  mean(switch(lfun,logit=invlogit(eval(X1)),log=exp(eval(X1))))
	  p.model_npw[i,2] <-  mean(switch(lfun,logit=invlogit(eval(X2)),log=exp(eval(X2))))
	  p.model_npw[i,3] <-  mean(switch(lfun,logit=invlogit(eval(X3)),log=exp(eval(X3))))
	}
	# average predicted difference
	apd_matrix[4,] <- extractQI(vector=p.model_npw[,2]-p.model_npw[,1])
	apd_matrix[5,] <- extractQI(vector=p.model_npw[,3]-p.model_npw[,1])
	apd_matrix[6,] <- extractQI(vector=p.model_npw[,3]-p.model_npw[,2])
	apd_matrix[4:6,]




	# first differences: proximate affected admin unit
	model.data_near <- MatX
	colnames(model.data_near)
	model.data_near <- model.data_near[which(model.data_near[,"idp_ra"]==1),]
	model.data_near <- model.data_near[which(model.data_near[,floodnear]==1),]
	model.data_near_cf1 <- model.data_near
	model.data_near_cf2 <- model.data_near
	#
	model.data_near[,which(colnames(model.data_near)==floodnear)]<-0
	model.data_near[,which(colnames(model.data_near)==iterm2_near)]<-0
	model.data_near[,which(colnames(model.data_near)==DIDP_1000near)]<-0
	model.data_near[,which(colnames(model.data_near)==iterm1_near)]<-0
	#
	model.data_near_cf1[,which(colnames(model.data_near_cf1)==floodnear)]<-1
	model.data_near_cf1[,which(colnames(model.data_near_cf1)==iterm2_near)]<-1
	model.data_near_cf1[,which(colnames(model.data_near_cf1)==DIDP_1000near)]<-0
	model.data_near_cf1[,which(colnames(model.data_near_cf1)==iterm1_near)]<-0
	#
	model.data_near_cf2[,which(colnames(model.data_near_cf2)==floodnear)]<-1
	model.data_near_cf2[,which(colnames(model.data_near_cf2)==iterm2_near)]<-1
	model.data_near_cf2[,which(colnames(model.data_near_cf2)==DIDP_1000near)]<-1
	model.data_near_cf2[,which(colnames(model.data_near_cf2)==iterm1_near)]<-1

	# extract quantities of interest
	p.model_near<-array(0,dim=c(1000,3))
	for (i in 1:1000){
	  X1 <- expression(s.model[i,]%*%t(model.data_near))
	  X2 <- expression(s.model[i,]%*%t(model.data_near_cf1))
	  X3 <- expression(s.model[i,]%*%t(model.data_near_cf2))
	  p.model_near[i,1] <- mean(switch(lfun,logit=arm::invlogit(eval(X1)),log=exp(eval(X1))))
	  p.model_near[i,2] <- mean(switch(lfun,logit=arm::invlogit(eval(X2)),log=exp(eval(X2))))
	  p.model_near[i,3] <- mean(switch(lfun,logit=arm::invlogit(eval(X3)),log=exp(eval(X3))))
	}
	# average predicted difference
	apd_matrix[7,] <- extractQI(vector=p.model_near[,2]-p.model_near[,1])
	apd_matrix[8,] <- extractQI(vector=p.model_near[,3]-p.model_near[,1])
	apd_matrix[9,] <- extractQI(vector=p.model_near[,3]-p.model_near[,2])
	apd_matrix[7:9,]



	# return matrix of QI
	return(apd_matrix)

}